Problem with logFC: Poor identification of changes at high expression levels

Experiment: Translate the dataset 10 units at a time and see how the logFC values change

get_logFC = function(datExpr, trans){
  
  datExpr_SFARI = datExpr %>% filter(rownames(datExpr) %in% SFARI_genes$ID)
  rownames(datExpr_SFARI) = rownames(datExpr)[rownames(datExpr) %in% SFARI_genes$ID]
  datExpr_translated = datExpr_SFARI + 10*trans
  datProbes_SFARI = datProbes %>% filter(rownames(datExpr) %in% SFARI_genes$ID)
  counts = as.matrix(datExpr_translated)
  rowRanges = GRanges(datProbes_SFARI$chromosome_name,
                      IRanges(datProbes_SFARI$start_position, width=datProbes_SFARI$length),
                      strand=datProbes_SFARI$strand,
                      feature_id=datProbes_SFARI$ensembl_gene_id)
  
  se = SummarizedExperiment(assays=SimpleList(counts=counts), rowRanges=rowRanges, colData=datMeta)
  ddsSE = DESeqDataSet(se, design =~Diagnosis_)
  
  dds = DESeq(ddsSE)
  DE_info = results(dds) %>% data.frame %>% rownames_to_column(var = 'ID') %>%
            mutate('logFC'=log2FoldChange, 'translation'=10*trans) %>% 
            dplyr::select(ID, logFC, padj, translation)
  
  return(DE_info)
}

logFC_results = get_logFC(datExpr, 0)

for(trans in seq(1,10)){
  new = get_logFC(datExpr, trans)
  logFC_results = bind_rows(logFC_results, new)
}

p1 = ggplotly(ggplot(logFC_results, aes(translation, abs(logFC), group=translation, fill=translation)) + 
              geom_boxplot() + theme_minimal())

p2 = ggplotly(ggplot(logFC_results, aes(translation, padj, group=translation, fill=translation)) + 
              geom_boxplot() + theme_minimal() + 
              ggtitle('LogFC (left) and adj p-val (right) for different translations of the data'))

subplot(p1, p2, nrows=1)
significant = logFC_results %>% filter(abs(logFC)>log2(1.2) & padj<0.05) %>%  group_by(translation) %>% tally

ggplotly(ggplot(significant, aes(translation, n, fill=translation)) + geom_bar(stat='identity') +
         theme_minimal() + ggtitle('Number of DE genes for each translation'))
rm(new, p1, p2, logFC_results, significant)

Replicate the behaviour for the different SFARI scores

The gene expression grouped by SFARI could be approximated by a logNormal distribution … kind of…

all_points = datExpr %>% mutate('ID' = rownames(datExpr)) %>% left_join(SFARI_genes, by='ID') %>%
            filter(!is.na(`gene-score`)) %>% mutate(`gene-score` = as.factor(`gene-score` )) %>% 
            dplyr::select(c(colnames(datExpr), `gene-score`)) %>% melt
## Using gene-score as id variables
ggplotly(ggplot(all_points, aes(value, group=`gene-score`, fill=`gene-score`, color=`gene-score`)) + 
         geom_density(alpha=0.3, color=NA) + scale_x_continuous(trans='log2') + theme_minimal() +
         ggtitle('Gene expression grouped by SFARI score'))